Nishita Chaudhary M00977896 Visual Coursework 3¶
In [10]:
#Importing Libraries
from mtcnn.mtcnn import MTCNN #for face detdection
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from skimage.io import imread,imshow
from skimage.color import rgb2gray,rgb2hsv
from skimage.filters import threshold_otsu
Upload and display an Image¶
In [11]:
#Loading the image using open cv, initially the image is in the form of BGR.This image is in 3D format (height, width ,value(RGB)).
original_im= cv2.imread('nishitaa.jpg')
plt.imshow(original_im)
Out[11]:
<matplotlib.image.AxesImage at 0x26b1f4fe6a0>
In [12]:
#Converting BGR image to RGB using cv2.
im =cv2.cvtColor(original_im,cv2.COLOR_BGR2RGB)
plt.imshow(im)
Out[12]:
<matplotlib.image.AxesImage at 0x26b1dbe9940>
Image PreProcessing¶
In [13]:
#Converting RGB image to Gray image (Gray image is 2d image).
im_gray =cv2.cvtColor(im,cv2.COLOR_RGB2GRAY)
plt.imshow(im_gray, cmap = 'gray')
print(im_gray.min())
im_gray.max()
0
Out[13]:
254
In [14]:
im_gray = im_gray.astype(np.uint8) # converts the image array im_gray's data type to uint8, or unsigned 8-bit integer.
im_gray
Out[14]:
array([[ 92, 95, 97, ..., 147, 146, 145],
[ 93, 96, 98, ..., 144, 143, 142],
[ 94, 97, 100, ..., 139, 138, 137],
...,
[219, 228, 226, ..., 221, 227, 224],
[232, 228, 221, ..., 221, 228, 229],
[219, 226, 223, ..., 220, 217, 218]], dtype=uint8)
In [15]:
# Using gaussian kernel for Reducing Noise. These filters remove or reduce the effects of noise, like Gaussian noise, in an image.
#They make the image smoother and more visually appealing, which raises its overall quality.
#This kernal is convole in 3x3 matrix to the original image.The source of this kernel is the Gaussian distribution, a probability
#distribution of a continuous random variable represented by a bell-shaped curve.
Gauss = np.array([[1,2,1],[2,4,2],[1,2,1]])/16
G_img=cv2.filter2D(src=im_gray,ddepth=-1,kernel=Gauss)
plt.title("Gaussian Kernel")
plt.imshow(G_img, cmap = 'gray')
Out[15]:
<matplotlib.image.AxesImage at 0x26b1f79a880>
Edge detections Operators¶
In [16]:
#normalisation the image,scales the pixel values to the range [0, 1].
norm=G_img /255
1. Sobel Edge Detection¶
In [17]:
#It is the 1st order derivative
#Defining the x and y kernals
sobelx= np.array([[-1,0,1],[-2,0,2],[-1,0,1]])
sobely = np.rot90(sobelx)
#convoling the image using 3x3 kernal.Sobel horizontal and verticle to the input image to the image using the above kernal
sobelx_con = cv2.filter2D(norm, -1, sobelx)
sobely_con = cv2.filter2D(norm, -1, sobely)
#Using the results of the horizontal and vertical edge detection processes from the earlier stages, below line determines the gradient's
#magnitude at each pixel.The Euclidean distance between the gradients on the horizontal and vertical sides is calculated.
magnitude_sobel = np.sqrt(np.square(sobelx_con) + np.square(sobely_con))
#below line uses the arctangent of the ratio of the vertical to horizontal gradients to calculate the gradient direction at each pixel.
#This will provide each point's edge orientation angle.
gradient_direction_sobel = np.arctan2(sobely_con, sobelx_con)
In [18]:
plt.figure(figsize=(20, 10))
plt.subplot(141)
plt.imshow(sobelx_con, cmap = 'gray')
plt.title('sobel X')
plt.subplot(142)
plt.imshow(sobely_con, cmap = 'gray')
plt.title('Sobel Y')
plt.subplot(143)
plt.imshow(magnitude_sobel, cmap='gray')
plt.title('Magnitude Sobel')
plt.subplot(144)
plt.imshow(gradient_direction_sobel, cmap='gray')
plt.title('Direction Sobel')
Out[18]:
Text(0.5, 1.0, 'Direction Sobel')
2. Canny Edge detection¶
In [19]:
#Defining the canny fuction which takes 3 parameters the image, low and high threshold.
def canny_edge_detection_nishita(image_nishita, threshold_low, threshold_high):
# Step 1: Converting the image to grayscale
gray = cv2.cvtColor(image_nishita, cv2.COLOR_BGR2GRAY)
# Step 2: Applying Gaussian blur to the image
remove_noise = cv2.GaussianBlur(gray, (5, 5), 0)
# Step 3: Compute the gradient using Sobel operators
sobel_x = cv2.Sobel(remove_noise, cv2.CV_64F, 1, 0, ksize=3)
sbole_y = cv2.Sobel(remove_noise, cv2.CV_64F, 0, 1, ksize=3)
# Step 4: Compute the magnitude of the gradient
gradient_magnitude = np.sqrt(np.square(sobel_x) + np.square(sbole_y))
# Step 5: Compute the direction of the gradient
gradient_direction = np.arctan2(sbole_y, sobel_x)
# Step 6: Non-maximum suppression
M, N = gradient_magnitude.shape
P = np.zeros((M, N), dtype=np.int32)
angle = gradient_direction * 180. / np.pi
angle[angle < 0] += 180
for i in range(1, M - 1):
for j in range(1, N - 1):
s, c = 255, 255
if (0 <= angle[i, j] < 22.5) or (157.5 <= angle[i, j] <= 180):
s = gradient_magnitude[i, j + 1]
c = gradient_magnitude[i, j - 1]
elif 22.5 <= angle[i, j] < 67.5:
s = gradient_magnitude[i + 1, j - 1]
c = gradient_magnitude[i - 1, j + 1]
elif 67.5 <= angle[i, j] < 112.5:
s = gradient_magnitude[i + 1, j]
c = gradient_magnitude[i - 1, j]
elif 112.5 <= angle[i, j] < 157.5:
s = gradient_magnitude[i - 1, j - 1]
c = gradient_magnitude[i + 1, j + 1]
if (gradient_magnitude[i, j] >= s) and (gradient_magnitude[i, j] >= c):
P[i, j] = gradient_magnitude[i, j]
else:
P[i, j] = 0
# Step 7: Edge tracking by hysteresis
edges = np.zeros_like(P)
weak = np.int32(25)
strong = np.int32(255)
strong_i, strong_j = np.where(P >= threshold_high)
zeros_i, zeros_j = np.where(P < threshold_low)
edges[strong_i, strong_j] = strong
edges[zeros_i, zeros_j] = 0
for i in range(1, P.shape[0] - 1):
for j in range(1, P.shape[1] - 1):
if (threshold_low <= P[i, j] < threshold_high):
neighbors = P[i-1:i+2, j-1:j+2]
if np.max(neighbors) >= threshold_high:
edges[i, j] = strong
else:
edges[i, j] = 0
return edges.astype(np.uint8)
# Read an image
image_nishita = cv2.imread('nishitaa.jpg')
# Set the thresholds
threshold_low = 25
threshold_high = 80
# calling the function
canny_edge = canny_edge_detection_nishita(image_nishita, threshold_low, threshold_high)
# Display the original image and the Canny edge detection result
plt.figure(figsize=(10, 10))
plt.subplot(121)
plt.imshow(cv2.cvtColor(image_nishita, cv2.COLOR_BGR2RGB))
plt.title('Original Image Nishita')
plt.axis('off')
# Display the Canny edge detection result
plt.subplot(122)
plt.imshow(canny_edge, cmap='gray')
plt.title('Canny Edge Detection Nishita')
plt.axis('off')
plt.show()
3. Prewitt Edge Detection¶
In [20]:
#An operator for discrete differentiation is the Prewitt operator.
#It calculates the gradient of the image's intensity function approximately.
#The matching gradient at each location in the image is represented by the Prewitt operator's result.
px= np.array([[-1,0,1],[-1,0,1],[-1,0,1]])
py = np.rot90(px)
#Convole X and Y
px_con = cv2.filter2D(norm, -1, px)
py_con = cv2.filter2D(norm, -1, py)
#Magnitude of Image using the below formula
mag_prewitt = np.sqrt(np.square(px_con) + np.square(py_con))
#Displaying the Direction
gradient_direction_pewwit = np.arctan2(py_con, px_con)
In [21]:
plt.figure(figsize=(20, 10))
plt.subplot(141)
plt.imshow(px_con, cmap = 'gray')
plt.title('Prewitt X')
plt.subplot(142)
plt.imshow(py_con, cmap = 'gray')
plt.title('Prewitt Y')
plt.subplot(143)
plt.imshow(mag_prewitt, cmap='gray')
plt.title('Magnitude')
plt.subplot(144)
plt.imshow(gradient_direction_pewwit, cmap = 'gray')
plt.title('Direction')
Out[21]:
Text(0.5, 1.0, 'Direction')
4. Roberts Edge Detection¶
In [22]:
#It is the 1st order derivative. As shown, the operator consists of two 2x2 convolution masks.
#one mask is the other rotated 90 degrees. The Sobel operator and this are quite similar.
kernelx = np.array([[1,0],[0,-1]], dtype=np.float32)
kernely = np.rot90(kernelx)
robertsx = cv2.filter2D(norm , -1, kernelx)
robertsy = cv2.filter2D(norm , -1, kernely)
roberts = np.sqrt(np.power(robertsx,2) + np.power(robertsy,2))
gradient_direction_robert = np.arctan2(robertsx, robertsy)
In [23]:
plt.figure(figsize=(20, 10))
plt.subplot(141)
plt.imshow(robertsx, cmap='gray')
plt.title('Horizontal edge')
plt.subplot(142)
plt.imshow(robertsy, cmap='gray')
plt.title('vertical edge')
plt.subplot(143)
plt.imshow(roberts, cmap='gray')
plt.title('Magnitude')
plt.subplot(144)
plt.imshow(gradient_direction_robert, cmap = 'gray')
plt.title('Direction')
Out[23]:
Text(0.5, 1.0, 'Direction')
5. Scharf Edge Detection¶
In [24]:
#Applications like medical imaging and object recognition, where edge detection is essential, benefit greatly from the usage of the Scharr operator.
#The Scharr operator approximates the first-order derivatives of the image's intensity using 3x3 convolution kernels.
sx= np.array([[-3,0,3],[-10,0,10],[-1,0,1]])
sy = np.rot90(sx)
#convolue
sx_con = cv2.filter2D(norm, -1, sx)
sy_con = cv2.filter2D(norm, -1, sy)
#magnitude of scharf using the formula
mag_scharf = np.sqrt(np.square(sx_con) + np.square(sy_con))
#Display the Direction of an convoluted image
gradient_direction_scharf = np.arctan2(sy_con, sx_con)
plt.figure(figsize=(20, 10))
plt.subplot(1,4,1)
plt.imshow(sx_con, cmap='gray')
plt.title('scharf X')
plt.subplot(1,4,2)
plt.imshow(sy_con, cmap='gray')
plt.title('direction y')
plt.subplot(1,4,3)
plt.imshow(mag_scharf, cmap='gray')
plt.title('scharf Magnitude')
plt.subplot(1,4,4)
plt.imshow(gradient_direction_scharf, cmap='gray')
plt.title(' Scharf direction')
Out[24]:
Text(0.5, 1.0, ' Scharf direction')
6. laplacian Edge Detection¶
In [25]:
#An image function's second-order partial derivatives can be added up using the Laplacian operator, a differential operator.
l_kernal = np.array([[1, 0], [0, -1]])
#Defining gaussian image and gray image
laplacian_g= cv2.filter2D(G_img, -1, l_kernal )
laplacian_gray= cv2.filter2D(im_gray, -1, l_kernal )
plt.figure(figsize=(20, 10))
plt.subplot(141)
plt.imshow(im, cmap='gray')
plt.title('original image')
plt.subplot(142)
plt.imshow(magnitude_sobel, cmap = 'gray')
plt.title('sobel')
plt.subplot(143)
plt.imshow(laplacian_g, cmap = 'gray')
plt.title('Laplacian gaussian image')
plt.subplot(144)
plt.imshow(laplacian_gray, cmap='gray')
plt.title('Laplacian grey image')
Out[25]:
Text(0.5, 1.0, 'Laplacian grey image')
Image Segmentation¶
In [26]:
from skimage.filters import threshold_otsu
import numpy as np
import matplotlib.pyplot as plt
import cv2
from skimage.io import imread,imshow
from skimage.color import rgb2gray,rgb2hsv
#Defining the image again using skimage library
my_image = imread("nishitaa.jpg")
imshow(my_image)
#Converting the image from rgb to gray
image_g = rgb2gray(my_image)
imshow(image_g)
Out[26]:
<matplotlib.image.AxesImage at 0x26b30cd8b50>
In [27]:
#Histogram for the image, the are 8bit so,binning into 2^8 (256).
plt.hist(image_g.ravel(),bins=256)
plt.show()
#Manually taking the Threshold
thresh = 0.3
segmented_image = image_g > thresh
In [28]:
thersh_otsu = threshold_otsu(image_g)
print(thersh_otsu)
segmented_image_otsu = image_g > thersh_otsu
imshow(segmented_image_otsu)
#Display the original grayscale image with the color bar
fig,ax = plt.subplots(1,3,figsize=(15,5))
im = ax[0].imshow(image_g,cmap='gray')
fig.colorbar(im,ax=ax[0])
ax[0].set_title("Original Grayscale image")
# Display the image segmented using manually giving threshold value
ax[1].imshow(segmented_image,cmap='gray')
ax[1].set_title("manual Threshold")
#Display the image segmented using otsu
ax[2].imshow(segmented_image_otsu,cmap='gray')
ax[2].set_title("Otsu Threshold")
0.3909217532169118
Out[28]:
Text(0.5, 1.0, 'Otsu Threshold')
Color Thersolding¶
In [29]:
from skimage.color import rgb2hsv
# Loading and displaying the my image using skimage
my_image = imread("nishitaa.jpg")
plt.imshow(my_image)
plt.title("Original Image")
plt.axis('off')
plt.show()
# Converting the image to HSV color space
image_hsv = rgb2hsv(my_image)
# Extract the H, S, V channels
image_hsv_h = image_hsv[:, :, 0]
image_hsv_s = image_hsv[:, :, 1]
image_hsv_v = image_hsv[:, :, 2]
# Plot the H, S, V channels
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
ax[0].imshow(image_hsv_h, cmap='hsv')
ax[0].set_title("Hue")
cbar = fig.colorbar(ax[0].imshow(image_hsv_h, cmap='hsv'), ax=ax[0])
ax[1].imshow(image_hsv_s, cmap='hsv')
ax[1].set_title("Saturation")
ax[2].imshow(image_hsv_v, cmap='hsv')
ax[2].set_title("Value")
for a in ax:
a.axis('off')
plt.show()
# Create a mask based on the hue channel to segment red color
lower_mask = image_hsv[:, :, 0] > 0.0
upper_mask = image_hsv[:, :, 0] < 0.1
mask = lower_mask * upper_mask
# Apply the mask to the original image
segmented_image = my_image.copy()
segmented_image[:, :, 0] *= mask # Red channel
segmented_image[:, :, 1] *= mask # Green channel
segmented_image[:, :, 2] *= mask # Blue channel
# Display the segmented image
plt.imshow(segmented_image)
plt.title("Segmented Image")
plt.axis('off')
plt.show()
Dilb Lib¶
In [30]:
import dlib
#step1: read the image
imagegroup = cv2.imread("group.jpg")
#step2: converts to gray image
gray_group = cv2.cvtColor(imagegroup, cv2.COLOR_BGR2GRAY)
#step3: get HOG face detector and faces
hogFaceDetector = dlib.get_frontal_face_detector()
faces = hogFaceDetector(gray_group, 1)
#step4: loop through each face and draw a rect around it
for (i, rect) in enumerate(faces):
x = rect.left()
y = rect.top()
w = rect.right() - x
h = rect.bottom() - y
#draw a rectangle
cv2.rectangle(imagegroup, (x, y), (x + w, y + h), (0, 255, 0), 2)
imagegroup = cv2.cvtColor(imagegroup, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(20, 10))
#step5: display the resulted image
plt.imshow(imagegroup,cmap='gray')
Out[30]:
<matplotlib.image.AxesImage at 0x26b323e51f0>
In [31]:
from skimage import feature, exposure
# Load the image
image = cv2.imread("nishitaa.jpg")
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Extract HOG features
hog_image, hog_image_visualize = feature.hog(image_gray, orientations=19, pixels_per_cell=(10, 10),
cells_per_block=(10, 10), block_norm="L1", visualize=True)
# Plot histogram of HOG image
plt.hist(hog_image_visualize.flatten(), bins=256)
plt.show()
# Rescale HOG intensity for visualization
hog_image_rescaled = exposure.rescale_intensity(hog_image_visualize, in_range=(0, 3), out_range=(0, 255))
# Display HOG image
plt.imshow(hog_image_rescaled, cmap='gray')
plt.show()
# Initialize HOG descriptor
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# Detect pedestrians in the image
locations, _ = hog.detectMultiScale(image, winStride=(6, 6), padding=(8, 8), scale=1)
# Draw bounding boxes around detected pedestrians
for loc in locations:
x, y, w, h = loc
cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Display the image with bounding boxes
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.show()
K Means Clustering¶
An effective unsupervised machine learning technique for grouping data into clusters is k-means clustering. When the number of clusters is known in advance or can be accurately anticipated, it works especially well.
In [35]:
from sklearn.cluster import KMeans
# Taking the RGB Image "im"
im =cv2.cvtColor(original_im,cv2.COLOR_BGR2RGB)
# Reshaping the image to a 2D array of pixels and 3 color values (RGB)
pixel_no = im.reshape((-1, 3))
# Converting the image to float
pixel_no = np.float32(pixel_no)
# Define stopping criteria for K-means
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.85)
# Giving Maximum number of clusters to try
max_clusters = 10
# Storing the values of inertia for various numbers of clusters
inertia_values = []
# Calculating inertia for different numbers of clusters
for k in range(1, max_clusters + 1):
_, labels, centers = cv2.kmeans(pixel_no, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# Calculating inertia (within-cluster sum of squares)
inertia = np.sum(np.min(np.square(pixel_no - centers[labels.flatten()]), axis=1))
inertia_values.append(inertia)
# Plotting the elbow method graph
plt.plot(range(1, max_clusters + 1), inertia_values, marker='o')
plt.title('Elbow Method for Optimal K')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Inertia')
plt.grid(True)
plt.show()
# Getthing the optimal number of clusters based on the elbow method
#Python has zero-based indexing, therefore adding 1 is required. As a result, the index of the least inertia value is equal to k - 1,
#where k is the number of clusters.
optimal_k = inertia_values.index(min(inertia_values)) + 1
# Perform K-means clustering with the optimal number of clusters
compact, labels, centers = cv2.kmeans(pixel_no, optimal_k, None, criteria, 10, cv2.KMEANS_PP_CENTERS)
centers = np.uint8(centers)
# Create the segmented image
segmented_im = centers[labels.flatten()]
segmented_im = segmented_im.reshape(im.shape)
# Display the segmented image
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
axs[0].imshow(im)
axs[0].set_title('Original Image')
axs[0].axis('off')
axs[1].imshow(segmented_im)
axs[1].set_title(f'Segmented Image with {optimal_k} clusters')
axs[1].axis('off')
plt.show()
Face Detection¶
1.Using Deep Learning
In [36]:
from mtcnn.mtcnn import MTCNN
#find boundry box of face
im =cv2.cvtColor(original_im,cv2.COLOR_BGR2RGB)
model_deep = MTCNN()
print(model_deep)
output_image = model_deep.detect_faces(im)
face_count = len(output_image)
for i in range(face_count):
# Face detection box
box = output_image[i]['box']
conf = output_image[i]['confidence']
keypoints = output_image[i]['keypoints']
if conf > 0.6:
# Draw rectangle around the face
cv2.rectangle(im, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 6)
# Draw circles around the eyes, nose, and mouth
cv2.circle(im, keypoints['left_eye'], 2, (255, 0, 0), 6) # Blue for left eye
cv2.circle(im, keypoints['right_eye'], 2, (255, 0, 0), 6) # Blue for right eye
cv2.circle(im, keypoints['nose'], 2, (0, 0, 255), 6) # Red for nose
cv2.circle(im, keypoints['mouth_left'], 2, (0, 255, 255), 6) # Cyan for mouth left
cv2.circle(im, keypoints['mouth_right'], 2, (0, 255, 255), 6) # Cyan for mouth right
plt.imshow(im)
plt.show()
<mtcnn.mtcnn.MTCNN object at 0x0000026B1FB50850> 1/1 [==============================] - 0s 286ms/step WARNING:tensorflow:5 out of the last 18 calls to <function Model.make_predict_function.<locals>.predict_function at 0x0000026B1F575040> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/1 [==============================] - 0s 263ms/step 1/1 [==============================] - 0s 74ms/step 1/1 [==============================] - 0s 83ms/step 1/1 [==============================] - 0s 51ms/step 1/1 [==============================] - 0s 60ms/step 1/1 [==============================] - 0s 50ms/step 1/1 [==============================] - 0s 52ms/step 1/1 [==============================] - 0s 64ms/step 1/1 [==============================] - 0s 52ms/step 6/6 [==============================] - 0s 9ms/step 1/1 [==============================] - 0s 349ms/step
2. Using haarcascade
In [37]:
#As the images were combining and giving combined result,so defining the "im" again
im =cv2.cvtColor(original_im,cv2.COLOR_BGR2RGB)
# Initialize Haar cascades for face and eye detection, downloaded 'haarcascade_frontalface_default.xml' and 'haarcascade_eye.xml'
#in the local machine from github and uploaded in jupyter notebook. It is used pre-defiend cascade classifier to detect faces and eyes.
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
# Function to detect the face and eye, using the RGB image which is defined earlier.
def detect_faces_and_eyes(im):
# Detect faces in the input image and displaying total number of image found.Taking the gray image.
face_detect = face_cascade.detectMultiScale(im_gray, 1.1, 4)
print('Number of faces detected :', len(face_detect))
# After detecting the faces, drawing the rectangle to the face and circle to the eyes.
for (x, y, w, h) in face_detect:
#w and h stand for the bounding box's width and height, respectively, and x and y for the top-left corner of the bounding box.
cv2.rectangle(im, (x, y), (x+w, y+h), (255, 0, 0), 2)
roi_gray = im_gray[y:y+h, x:x+w]
roi_color = im[y:y+h, x:x+w]
# used pre-defiend eye Detect cascade classifier to detect eyes within Region of Interest (ROI)
eyes = eye_cascade.detectMultiScale(roi_gray)
# Drawing circles around the detected eyes. ew and eh for height and width and ex and xy for top left corner.
for (ex, ey, ew, eh) in eyes:
#Below line tells the detected eye's center coordinates.To identify the center point of the eye, it adds half of the bounding box's width (ew // 2)
#and height (eh // 2) to the top-left corner coordinates (x + ex, y + ey).
center = (x + ex + ew // 2, y + ey + eh // 2)
#radius of the circle round the eyes
radius = min(ew, eh) // 2
cv2.circle(im, center, radius, (0, 255, 255), 2)
# Displaying the image with detected faces and eyes
plt.imshow(im)
plt.axis('off')
plt.show()
# Calling the function to detect faces in the image
detect_faces_and_eyes(im)
Number of faces detected : 1
Determining Humans and Not humans¶
In [79]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
# Define data generators for training, validation, and testing data
#In order to preprocess the image data by rescaling pixel values to the range [0, 1], image data generators are defined using ImageDataGenerator.
train_datagen = ImageDataGenerator(rescale=1/255)
validate_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)
#Downloaded 15 images of 2 class huam and not human, assiging this train_data
#The paths provided to flow_from_directory specify the directories from which image data is loaded.It gives labels to the folder in o/p it shows 2 class.
train_data = train_datagen.flow_from_directory("C:\\Users\\nishu\\Downloads\\image processing\\training",
target_size=(200, 200), #resizing the image to the fixed size
batch_size=3,
class_mode='binary')
#downloaded 13 images of both class
validate_data = validate_datagen.flow_from_directory("C:\\Users\\nishu\\Downloads\\image processing\\validate",
target_size=(200, 200),
batch_size=3,
class_mode='binary')
#downloaded 8 images of both the class
test_data = test_datagen.flow_from_directory("C:\\Users\\nishu\\Downloads\\image processing\\test",
target_size=(200, 200),
batch_size=1,
class_mode='binary')
# A simple CNN model is defined using tf.keras.Sequential that consist of 3 convolutional layers with max pooling,and 2 fully connected layers.
#the output layers uses sigmoid
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(200, 200, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
# The binary cross-entropy loss and Adam optimizer are used to assemble the model, and accuracy is used as the evaluation metric.
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model, The model is trained using the training data generator (train_data) for 10 epochs.
history = model.fit(train_data, steps_per_epoch=len(train_data), epochs=10, validation_data=validate_data,
validation_steps=len(validate_data))
# Evaluate the model on the testing data (test_data)
loss, accuracy = model.evaluate(test_data, steps=len(test_data))
print("Testing Accuracy:", accuracy)
# Visualize some sample images from the datasets along with their predicted labels
sample_testing_images, sample_testing_labels = next(test_data)
# Map class indices to class labels
class_labels = {0: 'Human', 1: 'Not Human'}
plt.figure(figsize=(5, 5))
# Predict the label for the sample testing image
predicted_label = model.predict(sample_testing_images)[0][0]
predicted_class = 0 if predicted_label <= 0.5 else 1
# Plot sample testing image with predicted label
plt.imshow(sample_testing_images[0])
plt.title(f"Predicted Label: {class_labels[predicted_class]}")
plt.axis('off')
plt.show()
Found 15 images belonging to 2 classes. Found 13 images belonging to 2 classes. Found 8 images belonging to 2 classes. Epoch 1/10 5/5 [==============================] - 12s 1s/step - loss: 2.8384 - accuracy: 0.5333 - val_loss: 1.6906 - val_accuracy: 0.3846 Epoch 2/10 5/5 [==============================] - 5s 1s/step - loss: 0.9514 - accuracy: 0.6000 - val_loss: 1.0494 - val_accuracy: 0.3846 Epoch 3/10 5/5 [==============================] - 6s 1s/step - loss: 0.6500 - accuracy: 0.6667 - val_loss: 0.7422 - val_accuracy: 0.6923 Epoch 4/10 5/5 [==============================] - 6s 1s/step - loss: 0.1260 - accuracy: 1.0000 - val_loss: 0.8385 - val_accuracy: 0.5385 Epoch 5/10 5/5 [==============================] - 7s 1s/step - loss: 0.1132 - accuracy: 1.0000 - val_loss: 1.3846 - val_accuracy: 0.5385 Epoch 6/10 5/5 [==============================] - 7s 1s/step - loss: 0.0274 - accuracy: 1.0000 - val_loss: 1.2052 - val_accuracy: 0.6154 Epoch 7/10 5/5 [==============================] - 7s 2s/step - loss: 0.0424 - accuracy: 1.0000 - val_loss: 1.2500 - val_accuracy: 0.5385 Epoch 8/10 5/5 [==============================] - 6s 1s/step - loss: 0.0217 - accuracy: 1.0000 - val_loss: 0.8305 - val_accuracy: 0.6923 Epoch 9/10 5/5 [==============================] - 5s 1s/step - loss: 0.0021 - accuracy: 1.0000 - val_loss: 0.8777 - val_accuracy: 0.5385 Epoch 10/10 5/5 [==============================] - 5s 1s/step - loss: 0.0037 - accuracy: 1.0000 - val_loss: 0.9877 - val_accuracy: 0.5385 8/8 [==============================] - 0s 42ms/step - loss: 2.5534 - accuracy: 0.5000 Testing Accuracy: 0.5 1/1 [==============================] - 0s 412ms/step
The above code can be made accurate by using gridsearch optimizer to detected the proper layers and huge dataset to train and test.
Video Face Detection Using haarcascades¶
In [ ]:
#Detecting the face on video.
# Load the pre-trained Haar cascade classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# Initialize the video capture object
capture = cv2.VideoCapture(0) # Use 0 for webcam
while True:
# Read a frame from the video stream
ret, frame = capture.read()
if not ret:
break
# Convert the frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces in the grayscale frame
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
# Draw rectangles around the detected faces
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Display the resulting frame
cv2.imshow('Video', frame)
# Check for key press and break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the video capture object and close all windows
capture.release()
cv2.destroyAllWindows()
cv2.waitKey()